This script analyzes filtered mAb escape data¶
In [1]:
# this cell is tagged as parameters for `papermill` parameterization
#configs
altair_config = None
nipah_config = None
#inputs
binding_data = None
HENV103_filter = None
HENV117_filter = None
HENV26_filter = None
HENV32_filter = None
m102_filter = None
nAH1_filter = None
func_scores_low_effect_E3_file = None
concat_df_file = None
#outputs
escape_bubble_plot = None
bubble_1_mut_plot = None
mab_line_escape_plot = None
aggregate_mab_and_binding = None
aggregate_mab_and_niv_polymorphism = None
mab_plot_top = None
mab_plot_all = None
In [2]:
# Parameters
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/theme.py"
HENV103_filter = "results/filtered_data/escape/HENV103_escape_filtered.csv"
HENV117_filter = "results/filtered_data/escape/HENV117_escape_filtered.csv"
HENV26_filter = "results/filtered_data/escape/HENV26_escape_filtered.csv"
HENV32_filter = "results/filtered_data/escape/HENV32_escape_filtered.csv"
m102_filter = "results/filtered_data/escape/m102_escape_filtered.csv"
nAH1_filter = "results/filtered_data/escape/nAH1_escape_filtered.csv"
func_scores_low_effect_E3_file = (
"results/filtered_data/escape/e3_low_mab_effect_filter.csv"
)
concat_df_file = "results/filtered_data/escape/mab_filter_concat.csv"
binding_data = "results/filtered_data/binding/e2_binding_filtered.csv"
escape_bubble_plot = "results/images/escape_bubble_plot.html"
bubble_1_mut_plot = "results/images/escape_bubble_1_mut_plot.html"
overlap_escape_plot = "results/images/overlap_escape_plot.html"
mab_line_escape_plot = "results/images/mab_line_escape_plot.html"
mab_plot_top = "results/images/mab_plot_top.html"
mab_plot_all = "results/images/mab_plot_all.html"
aggregate_mab_and_binding = "results/images/aggregate_mab_and_binding.html"
aggregate_mab_and_niv_polymorphism = (
"results/images/aggregate_mab_and_niv_polymorphism.html"
)
In [3]:
if altair_config is None:
print("this is being run manually")
else:
print("papermill!")
papermill!
In [4]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import Bio.SeqIO
import yaml
import matplotlib
matplotlib.rcParams["svg.fonttype"] = "none"
from Bio import PDB
import dmslogo
from dmslogo.colorschemes import CBPALETTE
from dmslogo.colorschemes import ValueToColorMap
In [5]:
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
if (
os.getcwd()
== "/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/"
):
pass
print("Already in correct directory")
else:
os.chdir(
"/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/"
)
print("Setup in correct directory")
Setup in correct directory
For running interactively¶
In [6]:
if nipah_config is None:
print("this notebook is being run manually")
#configs
altair_config = "data/custom_analyses_data/theme.py"
nipah_config = "nipah_config.yaml"
#input
HENV103_filter = "results/filtered_data/escape/HENV103_escape_filtered.csv"
HENV117_filter = "results/filtered_data/escape/HENV117_escape_filtered.csv"
HENV26_filter = "results/filtered_data/escape/HENV26_escape_filtered.csv"
HENV32_filter = "results/filtered_data/escape/HENV32_escape_filtered.csv"
m102_filter = "results/filtered_data/escape/m102_escape_filtered.csv"
nAH1_filter = "results/filtered_data/escape/nAH1_escape_filtered.csv"
func_scores_low_effect_E3_file = "results/filtered_data/escape/e3_low_mab_effect_filter.csv"
concat_df_file = "results/filtered_data/escape/mab_filter_concat.csv"
binding_data = "results/filtered_data/binding/e2_binding_filtered.csv"
# escape_bubble_plot = 'results/images/escape_bubble_plot.html'
# bubble_1_mut_plot = 'results/images/escape_bubble_1_mut_plot.html'
# overlap_escape_plot = 'results/images/overlap_escape_plot.html'
# m102_heat = 'results/images/m102_heatmap.html'
# HENV26_heat = 'results/images/HENV26_heatmap.html'
# HENV32_heat = 'results/images/HENV32_heatmap.html'
# nAH1_heat = 'results/images/nAH1_heatmap.html'
# HENV117_heat = 'results/images/HENV117_heatmap.html'
# HENV103_heat = 'results/images/HENV103_heatmap.html'
else:
print("this notebook was run automatically")
this notebook was run automatically
In [7]:
if altair_config:
with open(altair_config, "r") as file:
exec(file.read())
with open(nipah_config) as f:
config = yaml.safe_load(f)
Load data¶
In [8]:
func_scores_E3_low_effect = pd.read_csv(func_scores_low_effect_E3_file)
combined_df = pd.read_csv(concat_df_file)
In [9]:
display(combined_df)
| site | wildtype | mutant | mutation | escape_mean | escape_median | escape_std | times_seen_ab | frac_models | effect | effect_std | times_seen_func_effects | ab | top_escape | wildtype_site | wt_type | mutant_type | wt_codon | closest_mutant_codon | min_mutations | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 71 | Q | C | Q71C | -0.14 | -0.14 | 0.20 | 3.00 | 0.67 | -0.72 | 0.78 | 3.00 | HENV-103 | False | Q71 | hydrophilic | special | CAA | NaN | 3 |
| 1 | 71 | Q | D | Q71D | -0.01 | -0.00 | 0.06 | 3.33 | 1.00 | -0.39 | 0.64 | 3.43 | HENV-103 | False | Q71 | hydrophilic | negative | CAA | GAC | 2 |
| 2 | 71 | Q | E | Q71E | -0.08 | -0.03 | 0.12 | 3.33 | 1.00 | -0.25 | 0.98 | 4.57 | HENV-103 | False | Q71 | hydrophilic | negative | CAA | GAA | 1 |
| 3 | 71 | Q | F | Q71F | 0.02 | 0.01 | 0.01 | 2.33 | 1.00 | -0.50 | 0.31 | 3.29 | HENV-103 | False | Q71 | hydrophilic | aromatic | CAA | NaN | 3 |
| 4 | 71 | Q | G | Q71G | 0.04 | 0.04 | 0.03 | 3.33 | 1.00 | -1.33 | 0.83 | 4.71 | HENV-103 | False | Q71 | hydrophilic | special | CAA | GGA | 2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 42160 | 602 | T | R | T602R | -0.01 | -0.02 | 0.00 | 7.33 | 1.00 | 0.41 | 0.18 | 6.29 | nAH1.3 | False | T602 | hydrophilic | positive | ACA | AGA | 1 |
| 42161 | 602 | T | S | T602S | 0.19 | 0.16 | 0.23 | 3.67 | 1.00 | 0.38 | 0.16 | 3.43 | nAH1.3 | False | T602 | hydrophilic | hydrophilic | ACA | TCA | 1 |
| 42162 | 602 | T | V | T602V | 0.10 | 0.13 | 0.08 | 6.00 | 1.00 | 0.41 | 0.13 | 5.43 | nAH1.3 | False | T602 | hydrophilic | hydrophobic | ACA | GTA | 2 |
| 42163 | 602 | T | W | T602W | 0.09 | 0.07 | 0.09 | 6.33 | 1.00 | 0.52 | 0.10 | 6.86 | nAH1.3 | False | T602 | hydrophilic | aromatic | ACA | NaN | 3 |
| 42164 | 602 | T | Y | T602Y | 0.21 | 0.25 | 0.10 | 6.67 | 1.00 | 0.41 | 0.24 | 6.14 | nAH1.3 | False | T602 | hydrophilic | aromatic | ACA | NaN | 3 |
42165 rows × 20 columns
Filtering parameters¶
In [10]:
def find_top_sites(df,ab):
tmp_df = df[df['ab'] == ab] #filter
top_escape_df = tmp_df.query('top_escape == True') #find max escape sites
top_escape_df.loc[top_escape_df['escape_mean'] < 0.25, 'escape_mean'] = 0
sum_site_list = top_escape_df['site'].unique().tolist()
print(sum_site_list)
return sum_site_list
#call the function and make site list for each mAb
m102_sites = find_top_sites(combined_df,'m102.4')
HENV26_sites = find_top_sites(combined_df,'HENV-26')
HENV117_sites = find_top_sites(combined_df,'HENV-117')
HENV103_sites = find_top_sites(combined_df,'HENV-103')
HENV32_sites = find_top_sites(combined_df,'HENV-32')
nAH1_sites = find_top_sites(combined_df,'nAH1.3')
#make combined lists of antibodies depending on epitope
binding_face_list = list(set(m102_sites + HENV26_sites + HENV117_sites))
dimer_face_list = list(set(HENV103_sites + HENV32_sites))
everything_list = list(set(m102_sites + HENV26_sites + HENV117_sites + HENV103_sites + HENV32_sites + nAH1_sites))
[239, 507, 559, 577, 582, 586, 589] [491, 494, 501, 529, 530] [171, 258, 555, 580, 582, 583, 586, 587] [205, 258, 259, 260, 264] [199, 200, 205] [184, 185, 186, 187, 188, 189, 190, 447, 448, 449, 450, 451, 452, 468, 513, 515, 516, 517, 518, 519, 520, 597]
make dataframes for making logo plots¶
In [11]:
def find_sites_for_ab(df,site_list,ab_list,color_by):
tmp_df = combined_df[combined_df['site'].isin(site_list)]
tmp_df = tmp_df[tmp_df['ab'].isin(ab_list)]
tmp_df.loc[tmp_df['escape_mean'] < 0.25, 'escape_mean'] = 0
#Add wildtype residue info for logo plot
tmp_df['wildtype_site'] = tmp_df['wildtype'].astype(str) + tmp_df['site'].astype(str)
#Find colors based on effect
tmp_df['clip'] = np.clip(tmp_df[color_by], None, 0)
min_prop = tmp_df[color_by].min()
max_prop = tmp_df['clip'].max()
map1 = ValueToColorMap(minvalue=min_prop, maxvalue=max_prop, cmap='YlGn')
tmp_df['color'] = tmp_df['clip'].map(map1.val_to_color)
return tmp_df
everything_df = find_sites_for_ab(combined_df,everything_list,['m102.4','HENV-26','HENV-117','HENV-32','HENV-103','nAH1.3'],'effect')
custom_order = ['m102.4', 'HENV-117', 'HENV-26','HENV-103','HENV-32','nAH1.3']
everything_df['ab'] = pd.Categorical(everything_df['ab'], categories=custom_order, ordered=True)
everything_df = everything_df.sort_values(by='ab')
# get information for binding face antibodies
binding_face_df = find_sites_for_ab(combined_df,binding_face_list,['m102.4','HENV-26','HENV-117'],'effect')
#Want to put them in a specific order so do the following:
custom_order = ['m102.4', 'HENV-117', 'HENV-26']
binding_face_df['ab'] = pd.Categorical(binding_face_df['ab'], categories=custom_order, ordered=True)
binding_face_df = binding_face_df.sort_values(by='ab')
# get dimer face information
dimer_face_df = find_sites_for_ab(combined_df,dimer_face_list,['HENV-103','HENV-32'],'effect')
# get information about nAH1.3
nAH1_df = find_sites_for_ab(combined_df,nAH1_sites,['nAH1.3'],'effect')
#make a df for sites that have opposite effects on mab escape
custom_order=['m102.4', 'HENV-117', 'HENV-26','HENV-103','HENV-32']
fusion_trig_df = find_sites_for_ab(combined_df,[165,166,167,168,169,170,171,172,204,208,209,270,589],['m102.4','HENV-117','HENV-26','HENV-32','HENV-103'],'effect')
fusion_trig_df['ab'] = pd.Categorical(fusion_trig_df['ab'], categories=custom_order, ordered=True)
fusion_trig_df = fusion_trig_df.sort_values(by='ab')
make the facet logo plots¶
In [12]:
def generate_facet_logo_plot(df,output_file_name):
"""Generate logo plot and save as a file."""
draw_logo_kwargs={
"letter_col": "mutant",
"color_col": "color",
"xtick_col": "wildtype_site",
"letter_height_col": "escape_mean",
"xlabel": "",
"clip_negative_heights": True,
}
fig, ax = dmslogo.facet_plot(
data=df,
x_col='site',
gridrow_col='ab',
share_ylim_across_rows=False,
show_col=None,
draw_logo_kwargs=draw_logo_kwargs,
)
fig
fig.savefig(output_file_name, bbox_inches='tight',format='svg')
generate_facet_logo_plot(binding_face_df,'results/images/logo_plots/binding_face_escape.svg')
generate_facet_logo_plot(binding_face_df.query('min_mutations == 1'),'results/images/logo_plots/binding_face_escape_1_mutant_away.svg')
generate_facet_logo_plot(dimer_face_df,'results/images/logo_plots/dimer_face_escape.svg')
generate_facet_logo_plot(dimer_face_df.query('min_mutations ==1'),'results/images/logo_plots/dimer_face_escape_1_mutant_away.svg')
generate_facet_logo_plot(nAH1_df,'results/images/logo_plots/nAH1_escape_solo.svg')
generate_facet_logo_plot(nAH1_df.query('min_mutations ==1'),'results/images/logo_plots/nAH1_escape_solo_1_mutant_away.svg')
In [13]:
def generate_facet_logo_plot_binding(df,output_file_name):
"""Generate logo plot and save as a file."""
draw_logo_kwargs={
"letter_col": "mutant",
"color_col": "color",
"xtick_col": "wildtype_site",
"letter_height_col": "escape_mean",
"xlabel": "",
"clip_negative_heights": False,
}
fig, ax = dmslogo.facet_plot(
data=df,
x_col='site',
gridrow_col='ab',
share_ylim_across_rows=False,
show_col=None,
draw_logo_kwargs=draw_logo_kwargs,
)
fig
fig.savefig(output_file_name, bbox_inches='tight',format='svg')
generate_facet_logo_plot_binding(fusion_trig_df,'results/images/logo_plots/fusion_trig.svg')
In [14]:
def find_combined_sites(list1, list2):
combined_list = list(list1) + list(list2)
unique_elements = set(combined_list)
sorted_list = sorted(unique_elements)
return sorted_list
def find_top_mean_escape_sites(df):
for ab in ['m102.4','HENV-26','HENV-117','HENV-103','HENV-32','nAH1.3']:
# select specific antibody
tmp_df = df[df['ab'] == ab]
# find max escape sites
max_df = tmp_df.sort_values(by='escape_mean',ascending=False).head(10)
max_sites = list(max_df['site'].unique())
print(f'For {ab}, max escape sites are :{max_sites}')
# find top summed escape sites
agg_df = tmp_df.groupby(['site', 'ab'])['escape_mean'].sum().reset_index()
tmp_df_sort = agg_df.sort_values(by='escape_mean',ascending=False).head(10)
summed_sites = list(tmp_df_sort['site'].unique())
print(f'For {ab}, summed escape sites are: {summed_sites}')
combined_list = find_combined_sites(max_sites,summed_sites)
print(f'The combined list for {ab} is: {combined_list}\n')
find_top_mean_escape_sites(combined_df)
For m102.4, max escape sites are :[239, 559, 589, 582, 507, 577, 586] For m102.4, summed escape sites are: [582, 239, 589, 586, 555, 270, 172, 542, 577, 305] The combined list for m102.4 is: [172, 239, 270, 305, 507, 542, 555, 559, 577, 582, 586, 589] For HENV-26, max escape sites are :[491, 530, 501, 494, 529] For HENV-26, summed escape sites are: [530, 491, 501, 529, 171, 257, 172, 208, 589, 167] The combined list for HENV-26 is: [167, 171, 172, 208, 257, 491, 494, 501, 529, 530, 589] For HENV-117, max escape sites are :[583, 587, 586, 258, 580, 555] For HENV-117, summed escape sites are: [580, 586, 582, 171, 587, 589, 257, 208, 204, 555] The combined list for HENV-117 is: [171, 204, 208, 257, 258, 555, 580, 582, 583, 586, 587, 589] For HENV-103, max escape sites are :[264, 258, 260, 259] For HENV-103, summed escape sites are: [205, 260, 258, 259, 264, 277, 274, 275, 151, 509] The combined list for HENV-103 is: [151, 205, 258, 259, 260, 264, 274, 275, 277, 509] For HENV-32, max escape sites are :[199, 200] For HENV-32, summed escape sites are: [199, 200, 534, 596, 205, 277, 274, 275, 509, 554] The combined list for HENV-32 is: [199, 200, 205, 274, 275, 277, 509, 534, 554, 596] For nAH1.3, max escape sites are :[468, 517, 450, 185] For nAH1.3, summed escape sites are: [468, 185, 517, 450, 447, 516, 189, 184, 188, 518] The combined list for nAH1.3 is: [184, 185, 188, 189, 447, 450, 468, 516, 517, 518]
In [15]:
def identify_escape_sites(df, ab):
subset = df[(df["ab"] == ab)]
unique_sites = list(subset["site"].unique())
return unique_sites
abs = ["HENV-26", "HENV-103", "HENV-32", "HENV-117", "m102.4", "nAH1.3"]
sites_dict = {} # Create an empty dictionary to store the results
for ab in abs:
sites_dict[ab] = identify_escape_sites(combined_df.query('top_escape == True'), ab)
display(sites_dict) # need site dict for later
{'HENV-26': [491, 494, 501, 529, 530],
'HENV-103': [205, 258, 259, 260, 264],
'HENV-32': [199, 200, 205],
'HENV-117': [171, 258, 555, 580, 582, 583, 586, 587],
'm102.4': [239, 507, 559, 577, 582, 586, 589],
'nAH1.3': [184,
185,
186,
187,
188,
189,
190,
447,
448,
449,
450,
451,
452,
468,
513,
515,
516,
517,
518,
519,
520,
597]}
Plot bubble chart showing mAb escape for individual mutants by functional score for both E2 or E3¶
In [16]:
order_ab = ["m102.4", "HENV-26", "HENV-117", "HENV-103", "HENV-32", "nAH1.3"]
def generate_chart(df):
variant_selector = alt.selection_point(
on="mouseover", empty=False, fields=["site"], value=1
)
chart = (
alt.Chart(
df,
title=alt.Title(
"Top Antibody Escape Mutations",
subtitle="Hover over points to see escape at same site",
),
)
.mark_point(stroke="black")
.encode(
x=alt.X(
"ab:O",
sort=order_ab,
title="Antibody",
axis=alt.Axis(labelAngle=-90, grid=False),
),
y=alt.Y(
"effect:Q",
title="Cell Entry of Top Escape",
axis=alt.Axis(
grid=True, tickCount=4, values=[0.5, 0, -0.5, -1, -1.5, -2]
),
),
size=alt.Size(
"escape_mean", legend=alt.Legend(title="Mean Escape By Mutation")
),
xOffset="random:Q",
tooltip=[
"site",
"wildtype",
"mutant",
"ab",
"effect",
"escape_mean",
"escape_std",
],
color=alt.Color("ab").legend(None),
opacity=alt.condition(variant_selector, alt.value(1), alt.value(0.2)),
strokeWidth=alt.condition(variant_selector, alt.value(2), alt.value(0)),
)
.transform_calculate(
random="sqrt(-1*log(random()))*cos(2*PI*random())"
)
.properties(width=config["bubble_width"], height=config["bubble_height"])
.add_params(variant_selector)
)
return chart
escape_bubble = generate_chart(combined_df.query('top_escape == True'))
escape_bubble.display()
if mab_line_escape_plot is not None:
escape_bubble.save(escape_bubble_plot)
Now summarize by number of mutations between wildtype and mutant codons¶
In [17]:
def generate_chart_all(df):
variant_selector = alt.selection_point(
on="mouseover", empty=False, fields=["site"], value=1
)
radio = alt.binding_radio(
options=[1, 2, 3], labels=["1", "2", "3"], name="Min Mutations:"
)
mutation_selector = alt.param(name="MutationSelector", value=1, bind=radio)
slider = alt.binding_range(min=0.2, max=1.6, step=0.1, name="escape_mean")
selector = alt.param(name="SelectorName", value=0.2, bind=slider)
chart = (
alt.Chart(
df,
title=alt.Title(
"Antibody Escape Mutations",
subtitle="Hover over points to see escape at same site",
),
)
.mark_point(filled=True, stroke="black")
.encode(
x=alt.X(
"ab:O",
sort=order_ab,
title="Antibody",
axis=alt.Axis(labelAngle=-90, grid=False),
),
y=alt.Y(
"effect:Q",
title="Cell Entry of Top Escape",
axis=alt.Axis(
grid=True, tickCount=4, values=[0.5, 0, -0.5, -1, -1.5, -2]
),
),
size=alt.Size(
"escape_mean", legend=alt.Legend(title="Mean Escape By Mutation")
),
xOffset="random:Q",
tooltip=[
"site",
"wildtype",
"mutant",
"ab",
"effect",
"escape_mean",
"escape_std",
],
color=alt.Color("ab").legend(None),
opacity=alt.condition(variant_selector, alt.value(1), alt.value(0.4)),
strokeWidth=alt.condition(variant_selector, alt.value(2), alt.value(0)),
)
.transform_calculate(
random="sqrt(-1*log(random()))*cos(2*PI*random())"
# random='random'
)
#.properties(width=config["bubble_width"], height=config["bubble_height"])
.properties(width=200,height=250)
.add_params(variant_selector, mutation_selector, selector)
.transform_filter(
(alt.datum.min_mutations == mutation_selector)
& (alt.datum.escape_mean > selector)
)
)
return chart
#all_escape = generate_chart_all(combined_df.query("escape_median >= 0.2"))
all_escape = generate_chart_all(combined_df.query('top_escape == True'))
all_escape.display()
In [18]:
# Make combined figure
combined_bubble_plots = (escape_bubble | all_escape)
combined_bubble_plots.display()
In [19]:
def plot_escape_and_mutations_away(df):
variant_selector = alt.selection_point(
on="mouseover", empty=False, fields=["site"], value=1
)
radio = alt.binding_radio(options=[1, 2, 3], name="Min Mutations:")
mutation_selector = alt.param(name="MutationSelector", value=1, bind=radio)
chart = (
alt.Chart(
df,
title=alt.Title(
"Top Antibody Escape Mutations",
subtitle="By # of nucleotide mutations away",
),
)
.mark_point(filled=True, stroke="black",strokeWidth=0.75,opacity=0.3)
.encode(
x=alt.X(
"ab:O",
sort=order_ab,
title='Antibody',
axis=alt.Axis(labelAngle=-90, grid=False),
),
y=alt.Y(
"effect:Q",
title="Cell Entry of Escape Mutants",
axis=alt.Axis(
grid=True, tickCount=4, values=[0.5, 0, -0.5, -1, -1.5, -2]
),
), # 'Q' denotes a quantitative variable
size=alt.Size("escape_mean", legend=alt.Legend(title="Escape of Mutant")),
xOffset="random:Q",
tooltip=["ab", "effect", "escape_mean", "site", "mutant"],
#opacity=alt.condition(variant_selector, alt.value(1), alt.value(0.4)),
#strokeWidth=alt.condition(variant_selector, alt.value(2), alt.value(0)),
color=alt.Color("ab:N").legend(None),
)
.transform_calculate(
# random='random()'
random="sqrt(-2*log(random()))*cos(2*PI*random())"
)
.properties(width=200, height=config["bubble_height"])
.add_params(variant_selector, mutation_selector)
.transform_filter((alt.datum.min_mutations == mutation_selector))
)
return chart
bubble_plot_1_mut_away = plot_escape_and_mutations_away(combined_df.query('top_escape == True'))
bubble_plot_1_mut_away.display()
if mab_line_escape_plot is not None:
bubble_plot_1_mut_away.save(bubble_1_mut_plot)
In [20]:
def plot_escape_and_mutations_away_summed(df):
variant_selector = alt.selection_point(
on="mouseover", empty=False, fields=["site"], value=1
)
aggregated_df = df.groupby(['site','ab']).agg({
'escape_mean': 'sum',
'wildtype': 'first',
#'ab': 'first',
'effect': 'median',
}).reset_index()
aggregated_df = aggregated_df.query('escape_mean > 2')
#display(aggregated_df)
chart = (
alt.Chart(
aggregated_df,
)
.mark_point(filled=True, stroke="black",strokeWidth=0.75,opacity=0.3)
.encode(
x=alt.X(
"ab:O",
sort=order_ab,
title='Antibody',
axis=alt.Axis(labelAngle=-90, grid=True),
),
y=alt.Y(
"effect:Q",
title="Cell Entry of Escape Mutants",
axis=alt.Axis(
grid=True, tickCount=4, #values=[0.5, 0, -0.5, -1, -1.5, -2]
),
),
size=alt.Size("escape_mean", legend=alt.Legend(title="Escape Score")),
xOffset="random:Q",
tooltip=["ab", "effect", "escape_mean", "site"],
#opacity=alt.condition(variant_selector, alt.value(1), alt.value(0.4)),
#strokeWidth=alt.condition(variant_selector, alt.value(2), alt.value(0)),
color=alt.Color("ab:N").legend(None),
)
.transform_calculate(
# random='random()'
random="sqrt(-2*log(random()))*cos(2*PI*random())"
)
.properties(width=150, height=config["bubble_height"])
.add_params(variant_selector)
)
return chart
bubble_plot_1_mut_away = plot_escape_and_mutations_away_summed(combined_df)
bubble_plot_1_mut_away.display()
#if mab_line_escape_plot is not None:
# bubble_plot_1_mut_away.save(bubble_1_mut_plot)
In [21]:
def find_overlapping_escape(df):
slider = alt.binding_range(
min=config["min_func_effect_for_ab"], max=0, step=0.25, name="effect"
)
selector = alt.param(name="SelectorName", value=-4, bind=slider)
radio = alt.binding_radio(options=[1, 2, 3], name="Min Mutations:")
mutation_selector = alt.param(name="MutationSelector", value=1, bind=radio)
df_filtered = df
# Group by 'site' and 'mutant', count the unique 'ab' values for each group
grouped = df_filtered.groupby(["site", "mutant"])["ab"].nunique().reset_index()
# Filter groups where the count of unique 'ab' values is at least 2
result = grouped[grouped["ab"] >= 2]
# Merge the result with the original dataframe to get the full rows
df_result = pd.merge(df, result[["site", "mutant"]], on=["site", "mutant"])
df_result["mutation_number"] = (
df_result["mutation"].str.extract("(\d+)").astype(int)
)
base = (
(
alt.Chart(df_result, title=alt.Title("Shared antibody escape mutations"))
.mark_rect()
.encode(
x=alt.X(
"mutation:O",
title="Site",
sort=alt.EncodingSortField(field="mutation_number"),
axis=alt.Axis(labelAngle=-90, grid=False),
),
y=alt.Y(
"ab:O", title="Mutant", sort=order_ab, axis=alt.Axis(grid=False)
), # Apply custom sort order here
color="escape_mean",
tooltip=[
"site",
"wildtype",
"mutant",
"escape_mean",
"min_mutations",
],
)
)
.properties(height=200, width=400)
.add_params(selector, mutation_selector)
.transform_filter(
(alt.datum.effect >= selector)
& (alt.datum.min_mutations == mutation_selector)
)
)
return base
overlap_escape = find_overlapping_escape(combined_df.query('top_escape == True'))
overlap_escape.display()
if mab_line_escape_plot is not None:
overlap_escape.save(overlap_escape_plot)
Line plots of escape¶
In [22]:
def plot_line_escape(df):
variant_selector = alt.selection_point(
on="mouseover", empty=False, fields=["site"], value=0
)
# Group by 'site' and 'mutant', count the unique 'ab' values for each group
summed = df.groupby(["site", "ab"])["escape_mean"].mean().reset_index().round(3)
# Need to add dummy row because site 500 has been completely masked out due to low entry scores and not showing up on x-axis
new_row = pd.DataFrame({'site': [500], 'ab': ['HENV-103'], 'escape_mean': [0]})
summed = pd.concat([summed, new_row], ignore_index=True)
empty_chart = []
ab_list = ["m102.4", "HENV-26", "HENV-117", "HENV-103", "HENV-32", "nAH1.3"]
for idx, ab in enumerate(ab_list):
tmp_df = summed[summed["ab"] == ab]
# color = '#1f4e79'
if ab in ["m102.4", "HENV-26", "HENV-117"]:
color = "#1f4e79"
if ab in ["HENV-103", "HENV-32"]:
color = "#ff7f0e"
if ab in ["nAH1.3"]:
color = "#2ca02c"
# Conditionally set the x-axis labels and title for the last plot
is_last_plot = idx == len(ab_list) - 1
x_axis = alt.Axis(
values=[100, 200, 300, 400, 500, 600],
tickCount=6,
labelAngle=-90,
grid=True,
labelExpr="datum.value % 100 === 0 ? datum.value : ''",
title="Site" if is_last_plot else None,
labels=is_last_plot,
) # Only show labels for the last plot
base = (
alt.Chart(tmp_df)
.mark_line(size=1, color=color)
.encode(
x=alt.X("site:O", axis=x_axis),
y=alt.Y("escape_mean", title=f"{ab}", axis=alt.Axis(grid=True)),
)
.properties(
width=config["large_line_width"], height=config["large_line_height"]
)
)
point = (
base.mark_point(color="black", size=10, filled=True)
.encode(
x=alt.X("site:O", axis=x_axis),
y=alt.Y(
"escape_mean",
title=f"{ab}",
axis=alt.Axis(
grid=True,
),
),
size=alt.condition(variant_selector, alt.value(100), alt.value(15)),
color=alt.condition(
variant_selector, alt.value("black"), alt.value(color)
),
tooltip=["site", "escape_mean"],
)
.add_params(variant_selector)
)
chart = base + point
empty_chart.append(chart)
# Use configure_concat to adjust spacing between vertically concatenated plots
combined_chart = (
alt.vconcat(*empty_chart, spacing=1)
.resolve_scale(y="independent", x="shared", color="independent")
.properties(
title=alt.Title(
"Summed Antibody Escape by Site", subtitle="Colored by epitope"
)
)
)
return combined_chart
tmp_line = plot_line_escape(combined_df)
tmp_line.display()
if mab_line_escape_plot is not None:
tmp_line.save(mab_line_escape_plot)
Now calculate atomic distances between escape sites and closest amino acid in heavy and light chains¶
In [23]:
def calculate_min_distances(pdb_path, source_chain_id, target_chain_ids, name):
# Initialize the PDB parser and load the structure
parser = PDB.PDBParser(QUIET=True)
structure = parser.get_structure("structure_id", pdb_path)
source_chain = structure[0][source_chain_id]
target_chains = [structure[0][chain_id] for chain_id in target_chain_ids]
data = []
for residueA in source_chain:
if residueA.resname in ["HOH", "WAT", "IPA", "NAG"]:
continue
min_distance = float("inf")
closest_residueB = None
closest_chain_id = None
residues_within_4 = 0
for target_chain in target_chains:
for residueB in target_chain:
if residueB.resname in ["HOH", "WAT", "IPA"]:
continue
# Check for residues within 4 angstroms
is_within_4 = False
for atomA in residueA:
for atomB in residueB:
distance = atomA - atomB
if distance < min_distance:
min_distance = distance
closest_residueB = residueB
closest_chain_id = target_chain.get_id()
if distance < 4:
is_within_4 = True
if is_within_4:
residues_within_4 += 1
data.append(
{
"wildtype": residueA.resname,
"site": residueA.id[1],
"chain": closest_chain_id,
"residue": closest_residueB.id[1],
"residue_name": closest_residueB.resname,
"distance": min_distance,
"residues_within_4": residues_within_4,
"ab": name,
}
)
# Convert data to pandas DataFrame
df = pd.DataFrame(data)
return df
def check_file(input_path, source_chain, target_chain, name, output_path):
file_path = output_path
if not os.path.exists(file_path):
print(f"File {name} does not exist, running calculation")
output_df = calculate_min_distances(
input_path, source_chain, target_chain, name
)
print(f"done calculating for {file_path}")
output_df.to_csv(output_path, index=False)
return output_df
else:
print("File already exists,loading from disk")
output_df = pd.read_csv(output_path)
return output_df
pdb_path_26 = "data/custom_analyses_data/crystal_structures/6vy5.pdb"
source_chain_26 = "A"
target_chains_26 = ["H", "L"]
output_path_26 = "results/distances/df_HENV26_atomic_distances.csv"
pdb_path_32 = "data/custom_analyses_data/crystal_structures/6vy4.pdb"
source_chain_32 = "A"
target_chains_32 = ["H", "L"]
output_path_32 = "results/distances/df_HENV32_atomic_distances.csv"
pdb_path_nah = "data/custom_analyses_data/crystal_structures/7txz.pdb"
source_chain_nah = "A"
target_chains_nah = ["F", "E"]
output_path_nah = "results/distances/df_nAH_atomic_distances.csv"
pdb_path_m102 = "data/custom_analyses_data/crystal_structures/6cmg.pdb"
source_chain_m102 = "A"
target_chains_m102 = ["B", "C"]
output_path_m102 = "results/distances/df_m102_atomic_distances.csv"
df_HENV26 = check_file(
pdb_path_26, source_chain_26, target_chains_26, "HENV-26", output_path_26
)
df_HENV32 = check_file(
pdb_path_32, source_chain_32, target_chains_32, "HENV-32", output_path_32
)
df_nah = check_file(
pdb_path_nah, source_chain_nah, target_chains_nah, "nAH1.3", output_path_nah
)
df_nah["chain"].replace(
{"E": "H", "F": "L"}, inplace=True
) # Fix naming so consistent heavy and light chain naming
df_m102 = check_file(
pdb_path_m102, source_chain_m102, target_chains_m102, "m102.4", output_path_m102
)
df_m102["chain"].replace(
{"C": "H", "B": "L"}, inplace=True
) # Fix naming so consistent heavy and light chain naming
File already exists,loading from disk File already exists,loading from disk File already exists,loading from disk File already exists,loading from disk
In [24]:
def find_close_mab_sites(df, name):
unique_sites = df.query("distance <= 4")["site"].unique()
mab_site_list = list(unique_sites)
print(f"Close sites for mAb {name} are: {mab_site_list}")
return mab_site_list
### First find RBP sites that are close to mAb residues
nah_close = find_close_mab_sites(df_nah, "nAH1.3")
HENV26_close = find_close_mab_sites(df_HENV26, "HENV-26")
HENV32_close = find_close_mab_sites(df_HENV32, "HENV-32")
m102_close = find_close_mab_sites(df_m102, "m102.4")
### Now combined the close residues AND the top escape sites identified previously
nah_combined_sites = sites_dict["nAH1.3"] + nah_close
HENV26_combined_sites = sites_dict["HENV-26"] + HENV26_close
HENV32_combined_sites = sites_dict["HENV-32"] + HENV32_close
m102_combined_sites = sites_dict["m102.4"] + m102_close
Close sites for mAb nAH1.3 are: [172, 183, 184, 185, 186, 187, 188, 190, 191, 358, 449, 450, 451, 472, 515, 516, 517, 518, 570] Close sites for mAb HENV-26 are: [389, 401, 403, 404, 458, 488, 489, 490, 491, 492, 494, 497, 501, 504, 505, 506, 528, 529, 530, 531, 532, 533, 555, 556, 557, 581, 586] Close sites for mAb HENV-32 are: [196, 199, 200, 201, 202, 203, 205, 206, 207, 210, 254, 256, 258, 260, 262, 263, 264, 266, 553] Close sites for mAb m102.4 are: [239, 240, 241, 242, 305, 458, 488, 489, 490, 504, 505, 506, 507, 530, 532, 533, 555, 557, 559, 579, 580, 581, 588]
In [25]:
def make_distance(df):
subset_df = df[["site", "distance"]].copy()
subset_df["mutant"] = "distance"
subset_df["wildtype"] = ""
subset_df["effect"] = "escape_mean"
subset_df.rename(columns={"distance": "value"}, inplace=True)
return subset_df
distance_nah_df = make_distance(df_nah)
distance_26_df = make_distance(df_HENV26)
distance_32_df = make_distance(df_HENV32)
distance_m102_df = make_distance(df_m102)
display(distance_m102_df)
| site | value | mutant | wildtype | effect | |
|---|---|---|---|---|---|
| 0 | 176 | 35.044434 | distance | escape_mean | |
| 1 | 177 | 31.866014 | distance | escape_mean | |
| 2 | 178 | 27.842815 | distance | escape_mean | |
| 3 | 179 | 28.777035 | distance | escape_mean | |
| 4 | 180 | 28.332012 | distance | escape_mean | |
| ... | ... | ... | ... | ... | ... |
| 423 | 599 | 30.802711 | distance | escape_mean | |
| 424 | 600 | 28.920950 | distance | escape_mean | |
| 425 | 601 | 27.248772 | distance | escape_mean | |
| 426 | 602 | 29.020868 | distance | escape_mean | |
| 427 | 603 | 27.945621 | distance | escape_mean |
428 rows × 5 columns
Prepare dataframes for heatmaps¶
In [26]:
def make_empty_df_with_distance(ab, distance_file):
# print(ab)
sites = range(71, 603)
amino_acids = [
"A",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"K",
"L",
"M",
"N",
"P",
"Q",
"R",
"S",
"T",
"V",
"W",
"Y",
]
# Create the combination of each site with each amino acid
data = [{"site": site, "mutant": aa} for site in sites for aa in amino_acids]
# Create the DataFrame
empty_df = pd.DataFrame(data)
all_sites_df = pd.merge(
empty_df, combined_df.query(f'ab == "{ab}"'), on=["site", "mutant"], how="left"
)
df_melted = all_sites_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["escape_mean"],
var_name="effect",
value_name="value",
)
df_filtered = func_scores_E3_low_effect.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["effect"],
var_name="effect",
value_name="value",
)
df_test = pd.concat([df_melted, df_filtered, distance_file], ignore_index=True)
df_test["ab"] = ab
return df_test
empty_df_m102 = make_empty_df_with_distance("m102.4", distance_m102_df)
empty_df_HENV26 = make_empty_df_with_distance("HENV-26", distance_26_df)
empty_df_HENV32 = make_empty_df_with_distance("HENV-32", distance_32_df)
empty_df_nah = make_empty_df_with_distance("nAH1.3", distance_nah_df)
def make_empty_df(ab):
sites = range(71, 603)
amino_acids = [
"A",
"C",
"D",
"E",
"F",
"G",
"H",
"I",
"K",
"L",
"M",
"N",
"P",
"Q",
"R",
"S",
"T",
"V",
"W",
"Y",
]
# Create the combination of each site with each amino acid
data = [{"site": site, "mutant": aa} for site in sites for aa in amino_acids]
# Create the DataFrame
empty_df = pd.DataFrame(data)
all_sites_df = pd.merge(
empty_df, combined_df.query(f'ab == "{ab}"'), on=["site", "mutant"], how="left"
)
df_melted = all_sites_df.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["escape_mean"],
var_name="effect",
value_name="value",
)
df_filtered = func_scores_E3_low_effect.melt(
id_vars=["site", "mutant", "wildtype"],
value_vars=["effect"],
var_name="effect",
value_name="value",
)
df_test = pd.concat([df_melted, df_filtered], ignore_index=True)
df_test["ab"] = ab
return df_test
empty_df_HENV117 = make_empty_df("HENV-117")
empty_df_HENV103 = make_empty_df("HENV-103")
combined_ab = pd.concat(
[
empty_df_m102,
empty_df_HENV26,
empty_df_HENV32,
empty_df_nah,
empty_df_HENV117,
empty_df_HENV103,
]
)
display(combined_ab)
| site | mutant | wildtype | effect | value | ab | |
|---|---|---|---|---|---|---|
| 0 | 71 | A | NaN | escape_mean | NaN | m102.4 |
| 1 | 71 | C | Q | escape_mean | 0.29 | m102.4 |
| 2 | 71 | D | Q | escape_mean | -0.02 | m102.4 |
| 3 | 71 | E | Q | escape_mean | 0.02 | m102.4 |
| 4 | 71 | F | Q | escape_mean | 0.00 | m102.4 |
| ... | ... | ... | ... | ... | ... | ... |
| 13320 | 597 | T | I | effect | -3.26 | HENV-103 |
| 13321 | 598 | C | P | effect | -2.08 | HENV-103 |
| 13322 | 598 | I | P | effect | -2.01 | HENV-103 |
| 13323 | 598 | W | P | effect | -3.04 | HENV-103 |
| 13324 | 601 | G | C | effect | -2.05 | HENV-103 |
81680 rows × 6 columns
In [27]:
def plot_distance_only(df, trigger):
custom_order = [
"distance",
"R",
"K",
"H",
"D",
"E",
"Q",
"N",
"S",
"T",
"Y",
"W",
"F",
"A",
"I",
"L",
"M",
"V",
"G",
"P",
"C",
]
all_residues = range(71, 603)
final_df = df
final_df = final_df.sort_values(
"site"
) # Sort the dataframe by 'site' to ensure that duplicates are detected correctly.
sort_order = {
mutant: i for i, mutant in enumerate(custom_order)
} # Create a dictionary that maps each mutant to its sort rank based on the custom order
final_df["mutant_rank"] = final_df["mutant"].map(
sort_order
) # Map the 'mutant' column to these ranks
final_df = final_df.sort_values(
"mutant_rank"
) # Now sort the dataframe by this rank
final_df = final_df.drop(
columns=["mutant_rank"]
) # Drop the 'mutant_rank' column as it is no longer needed after sorting
sites = sorted(final_df["site"].unique(), key=lambda x: float(x))
ab_list = ["m102.4", "HENV-26", "HENV-117", "HENV-103", "HENV-32", "nAH1.3"]
empty_chart = [] # setup collection for charts
for idx, ab in enumerate(ab_list):
tmp_df = final_df[final_df["ab"] == ab]
if ab == "m102.4":
site_subset = m102_combined_sites
# legend_conditional = alt.Legend(title='Distance to mAb')
if ab == "HENV-26":
site_subset = HENV26_combined_sites
# legend_conditional = alt.Legend(title='Distance to mAb')
if ab == "HENV-32":
site_subset = HENV32_combined_sites
# legend_conditional = alt.Legend(title='Distance to mAb')
if ab == "HENV-103":
site_subset = sites_dict["HENV-103"]
# legend_conditional = alt.Legend(title=None)
if ab == "HENV-117":
site_subset = sites_dict["HENV-117"]
# legend_conditional = alt.Legend(title=None)
if ab == "nAH1.3":
site_subset = nah_combined_sites
# legend_conditional = alt.Legend(title='Distance to mAb')
# select which sites you will show
if trigger == True:
tmp_df = tmp_df[tmp_df["site"].isin(site_subset)]
x_axis = alt.Axis(
labelAngle=-90,
# labelExpr="datum.value % 10 === 0 ? datum.value : ''",
title="Site",
)
else:
tmp_df = tmp_df[tmp_df["site"].isin(all_residues)]
# Conditionally set the x-axis labels and title for the last plot
is_last_plot = idx == len(ab_list) - 1
x_axis = alt.Axis(
labelAngle=-90,
labelExpr="datum.value % 10 === 0 ? datum.value : ''",
title="Site" if is_last_plot else None,
labels=True,
) # Only show labels for the last plot
# Prepare the color scales separately for distance and effects
# Filter out 'distance' values before creating the effect heatmap
effect_df = tmp_df[
(tmp_df["mutant"] != "distance") & (tmp_df["effect"] != "effect")
]
max_color = effect_df["value"].max()
min_color = effect_df["value"].min()
# Adjust color scheme for abs with little sensitizing mutations
if min_color > -1:
min_color = min_color - 1
# Prepare the color scale for effects, Altair will automatically determine the domain
color_scale_escape = alt.Scale(
scheme="redblue", domainMid=0, domain=[min_color, max_color]
)
color_scale_entropy = alt.Scale(scheme="greens", domain=[0, 15], reverse=True)
strokewidth_size = 0.25
unique_wildtypes_df = tmp_df.drop_duplicates(subset=["site", "wildtype"])
# The chart for the heatmap
base = (
alt.Chart(tmp_df, title=f"{ab}")
.encode(
x=alt.X("site:O", title="Site", sort=sites, axis=x_axis),
y=alt.Y(
"mutant",
title="Amino Acid",
sort=alt.EncodingSortField(field="sort_order", order="ascending"),
axis=alt.Axis(grid=False),
), # Apply custom sort order here
tooltip=["site", "wildtype", "mutant", "value"],
)
.properties(width=alt.Step(10), height=alt.Step(11))
)
# Heatmap for distance
chart_empty = (
base.mark_rect(color="#e6e7e8")
.encode()
.transform_filter(alt.datum.effect == "escape_mean")
)
# Heatmap for effect
chart_effect = (
base.mark_rect(stroke="black", strokeWidth=strokewidth_size)
.encode(
color=alt.condition(
'datum.mutant != "distance"',
alt.Color(
"value:Q",
scale=color_scale_escape,
legend=alt.Legend(title=f"{ab} Escape"),
),
alt.value("transparent"),
),
)
.transform_filter(alt.datum.effect == "escape_mean")
)
# Heatmap for distance
if ab in ["m102.4", "HENV-26", "HENV-32", "nAH1.3"]:
chart_distance = (
base.mark_rect()
.encode(
color=alt.condition(
'datum.mutant == "distance"',
alt.Color(
"value:Q",
scale=color_scale_entropy,
legend=alt.Legend(title="Distance to mAb"),
),
alt.value("transparent"),
)
)
.transform_filter(alt.datum.effect == "escape_mean")
)
else:
chart_distance = (
base.mark_rect(color="transparent")
.encode(
# color=alt.Color('white'),
# alt.Color('value:Q', scale=color_scale_entropy,legend=alt.Legend(title='Distance to mAb')),
# alt.value('transparent'))
)
.transform_filter(alt.datum.effect == "escape_mean")
)
# Heatmap for distance
chart_filtered = (
base.mark_rect(
color="#939598", stroke="black", strokeWidth=strokewidth_size
)
.encode()
.transform_filter(alt.datum.effect == "effect")
)
# The layer for the wildtype boxes
wildtype_layer_box = (
alt.Chart(unique_wildtypes_df)
.mark_rect(color="white", stroke="black", strokeWidth=strokewidth_size)
.encode(
x=alt.X("site:O", sort=sites),
y=alt.Y(
"wildtype",
sort=alt.EncodingSortField(field="sort_order", order="ascending"),
),
opacity=alt.value(1),
)
.transform_filter(
(alt.datum.wildtype != "")
& (alt.datum.wildtype != None)
& (alt.datum.value != None)
)
)
# The layer for the wildtype amino acids
wildtype_layer = (
alt.Chart(unique_wildtypes_df)
.mark_text(color="black", text="X", size=8)
.encode(
x=alt.X("site:O", sort=sites),
y=alt.Y(
"wildtype",
sort=alt.EncodingSortField(field="sort_order", order="ascending"),
),
opacity=alt.value(1),
)
.transform_filter(
(alt.datum.wildtype != "")
& (alt.datum.wildtype != None)
& (alt.datum.value != None)
)
)
# Combine the heatmap layer with the wildtype layer
chart = alt.layer(
chart_empty,
chart_effect,
chart_distance,
chart_filtered,
wildtype_layer_box,
wildtype_layer,
).resolve_scale(color="independent")
empty_chart.append(chart)
combined_chart = (
alt.vconcat(*empty_chart, spacing=1)
.resolve_scale(y="shared", x="independent", color="independent")
.configure_title(
anchor="start", # Aligns the title to the left ('middle' for center, 'end' for right)
offset=10, # Adjusts the distance of the title from the chart
orient="top", # Positions the title at the top; use 'bottom' to position at the bottom
)
)
return combined_chart
mab_plot = plot_distance_only(combined_ab, True)
mab_plot.display()
if mab_line_escape_plot is not None:
mab_plot.save(mab_plot_top)
Make full antibody escape heatmaps¶
In [28]:
mab_all = plot_distance_only(combined_ab, False)
mab_all.display()
if mab_line_escape_plot is not None:
mab_all.save(mab_plot_all)
Now make heatmaps of antibody escape versus Ephrin Binding¶
First prepare data:
In [29]:
bind_df = pd.read_csv(binding_data)
binding_df = bind_df.groupby("site")["binding_mean"].mean().reset_index()
def make_empty_binding():
sites = range(71, 603)
data = [{"site": site} for site in sites]
empty_df = pd.DataFrame(data)
empty_df = pd.merge(empty_df, binding_df, on="site", how="left")
empty_df = empty_df.rename(columns={"binding_mean": "value"})
empty_df["effect"] = "escape_mean"
empty_df["ab"] = "Ephrin-B2 binding"
return empty_df
binding_empty = make_empty_binding()
escape_df = combined_df.groupby(["ab", "site"])["escape_mean"].median().reset_index()
def make_empty_df(ab):
sites = range(71, 603)
data = [{"site": site} for site in sites]
# Create the DataFrame
empty_df = pd.DataFrame(data)
all_sites_df = pd.merge(
empty_df, escape_df.query(f'ab == "{ab}"'), on=["site"], how="left"
)
df_melted = all_sites_df.melt(
id_vars=["site"],
value_vars=["escape_mean"],
var_name="effect",
value_name="value",
)
df_test = pd.concat([df_melted], ignore_index=True)
df_test["ab"] = ab
return df_test
ab_list = ["m102.4", "HENV-26", "HENV-117", "HENV-103", "HENV-32", "nAH1.3"]
# ab_list = ['HENV-32']
empty = []
for ab in ab_list:
tmp_df = make_empty_df(ab)
empty.append(tmp_df)
all_empties_df = pd.concat(empty, ignore_index=True)
all_empties_df = pd.concat([all_empties_df, binding_empty])
display(all_empties_df)
| site | effect | value | ab | |
|---|---|---|---|---|
| 0 | 71 | escape_mean | 0.010000 | m102.4 |
| 1 | 72 | escape_mean | 0.035000 | m102.4 |
| 2 | 73 | escape_mean | 0.040000 | m102.4 |
| 3 | 74 | escape_mean | -0.040000 | m102.4 |
| 4 | 75 | escape_mean | -0.020000 | m102.4 |
| ... | ... | ... | ... | ... |
| 527 | 598 | escape_mean | 0.162143 | Ephrin-B2 binding |
| 528 | 599 | escape_mean | -0.176667 | Ephrin-B2 binding |
| 529 | 600 | escape_mean | -0.107778 | Ephrin-B2 binding |
| 530 | 601 | escape_mean | 0.340000 | Ephrin-B2 binding |
| 531 | 602 | escape_mean | -0.020556 | Ephrin-B2 binding |
3724 rows × 4 columns
In [30]:
def make_heatmap_with_binding(df):
# Define the custom sort order directly in the encoding
sort_order = [
"NiV Polymorphism",
"Ephrin-B2 binding",
"m102.4",
"HENV-26",
"HENV-117",
"HENV-103",
"HENV-32",
"nAH1.3",
]
full_ranges = [
list(range(start, end))
for start, end in [(71, 181), (181, 291), (291, 401), (401, 511), (511, 603)]
]
# container to hold the charts
charts = []
color_scale_effect = alt.Scale(scheme="redblue", domainMid=0)
color_scale_binding = alt.Scale(scheme="redblue", domainMid=0)
for idx, subset in enumerate(full_ranges):
subset_df = df[df["site"].isin(subset)] # for the wrapping of sites
is_last_plot = idx == len(full_ranges) - 1
x_axis = alt.Axis(
labelAngle=-90,
labelExpr="datum.value % 10 === 0 ? datum.value : ''",
title="Site" if is_last_plot else None,
labels=True,
) # Only show labels for the last plot
effect_legend = (
alt.Legend(title="Antibody Escape") if is_last_plot else None
) # ,direction='horizontal',gradientLength=50,titleAnchor='middle',tickCount=3,labelAlign='center')
binding_legend = (
alt.Legend(title="Henipavirus Entropy") if is_last_plot else None
) # ,direction='horizontal',gradientLength=50,titleAnchor='middle',labelAlign='center')
print(is_last_plot)
print(effect_legend)
base = (
alt.Chart(subset_df)
.encode(
x=alt.X("site:O", title="Site", axis=x_axis),
y=alt.Y(
"ab", title=None, sort=sort_order, axis=alt.Axis(grid=False)
), # Correctly apply custom sort order
tooltip=["site", "value"],
)
.properties(width=alt.Step(10), height=alt.Step(11))
)
# Define the chart for empty cells
chart_empty = base.mark_rect(color="#e6e7e8").transform_filter(
alt.datum.effect == "escape_mean"
)
# Define the chart for cells with effect
chart_effect = (
base.mark_rect(stroke="black", strokeWidth=0.25)
.encode(
color=alt.condition(
'datum.effect == "escape_mean"',
alt.Color(
"value:Q", scale=color_scale_effect, legend=effect_legend
), # Define a color scale
alt.value("transparent"),
)
)
.transform_filter(alt.datum.effect == "escape_mean")
)
chart_binding = (
base.mark_rect(strokeWidth=1.1)
.encode(
stroke=alt.value("value"),
color=alt.condition(
'datum.effect == "escape_mean"',
alt.Color(
"value:Q", scale=color_scale_binding, legend=binding_legend
),
alt.value("transparent"),
),
)
.transform_filter(alt.datum.ab == "Ephrin-B2 binding")
)
chart_poly = (
base.mark_rect(color="black")
.encode()
.transform_filter(alt.datum.ab == "NiV Polymorphism")
)
# Layer the charts using `layer` instead of `+`
chart = alt.layer(
chart_empty, chart_effect, chart_binding, chart_poly
) # .resolve_scale(color='shared')
charts.append(chart)
combined_chart = alt.vconcat(
*charts, spacing=5, title="Heatmap of median mAb escape and Ephrin-B2 binding"
)
return combined_chart
# Assuming `all_empties_df` is your DataFrame and already defined
chart = make_heatmap_with_binding(all_empties_df)
chart.display()
if mab_line_escape_plot is not None:
chart.save(aggregate_mab_and_binding)
False
None
False
None
False
None
False
None
True
Legend({
title: 'Antibody Escape'
})
Now show heatmap with nipah polymorphisms¶
In [31]:
def make_contact():
df = pd.DataFrame({"site": niv_poly, "contact": [0.0] * len(niv_poly)})
df = df[["site", "contact"]]
# df['mutant'] = 'contact'
df["ab"] = "NiV Polymorphism"
df["effect"] = "escape_mean"
df.rename(columns={"contact": "value"}, inplace=True)
return df
niv_poly = config['nipah_poly']
contact_df = make_contact()
bind_df = pd.read_csv("results/filtered_data/binding/e2_binding_filtered.csv")
binding_df = bind_df.groupby("site")["binding_mean"].max().reset_index()
def make_empty_binding():
sites = range(71, 603)
data = [{"site": site} for site in sites]
empty_df = pd.DataFrame(data)
empty_df = pd.merge(empty_df, binding_df, on="site", how="left")
empty_df = empty_df.rename(columns={"binding_mean": "value"})
empty_df["effect"] = "escape_mean"
empty_df["ab"] = "Ephrin-B2 binding"
return empty_df
binding_empty = make_empty_binding()
escape_df = combined_df.groupby(["ab", "site"])["escape_mean"].max().reset_index()
def make_empty_df(ab):
sites = range(71, 603)
data = [{"site": site} for site in sites]
# Create the DataFrame
empty_df = pd.DataFrame(data)
all_sites_df = pd.merge(
empty_df, escape_df.query(f'ab == "{ab}"'), on=["site"], how="left"
)
df_melted = all_sites_df.melt(
id_vars=["site"],
value_vars=["escape_mean"],
var_name="effect",
value_name="value",
)
df_test = pd.concat([df_melted], ignore_index=True)
df_test["ab"] = ab
return df_test
ab_list = ["m102.4", "HENV-26", "HENV-117", "HENV-103", "HENV-32", "nAH1.3"]
empty = []
for ab in ab_list:
tmp_df = make_empty_df(ab)
empty.append(tmp_df)
all_empties_df = pd.concat(empty, ignore_index=True)
all_empties_df = pd.concat([all_empties_df, contact_df])
display(all_empties_df)
| site | effect | value | ab | |
|---|---|---|---|---|
| 0 | 71 | escape_mean | 0.42 | m102.4 |
| 1 | 72 | escape_mean | 0.09 | m102.4 |
| 2 | 73 | escape_mean | 0.34 | m102.4 |
| 3 | 74 | escape_mean | 0.07 | m102.4 |
| 4 | 75 | escape_mean | 0.11 | m102.4 |
| ... | ... | ... | ... | ... |
| 30 | 478 | escape_mean | 0.00 | NiV Polymorphism |
| 31 | 481 | escape_mean | 0.00 | NiV Polymorphism |
| 32 | 498 | escape_mean | 0.00 | NiV Polymorphism |
| 33 | 502 | escape_mean | 0.00 | NiV Polymorphism |
| 34 | 545 | escape_mean | 0.00 | NiV Polymorphism |
3227 rows × 4 columns
In [32]:
def make_heatmap_with_polymorphisms(df):
# Define the custom sort order directly in the encoding
sort_order = [
"NiV Polymorphism",
"m102.4",
"HENV-26",
"HENV-117",
"HENV-103",
"HENV-32",
"nAH1.3",
]
# full_ranges = [list(range(start, end)) for start, end in [(71, 204), (204, 337), (337, 470), (470, 603)]]
full_ranges = [
list(range(start, end))
for start, end in [(71, 181), (181, 291), (291, 401), (401, 511), (511, 603)]
]
# container to hold the charts
charts = []
color_scale_effect = alt.Scale(scheme="redblue", domainMid=0, domain=[0, 2])
color_scale_binding = alt.Scale(scheme="redblue", domainMid=0, domain=[-5, 2])
# Flags for showing the legend only the first time
effect_legend_added = True
binding_legend_added = True
for idx, subset in enumerate(full_ranges):
subset_df = df[df["site"].isin(subset)] # for the wrapping of sites
is_last_plot = idx == len(full_ranges) - 1
x_axis = alt.Axis(
labelAngle=-90,
labelExpr="datum.value % 10 === 0 ? datum.value : ''",
title="Site" if is_last_plot else None,
labels=True,
) # Only show labels for the last plot
base = (
alt.Chart(subset_df)
.encode(
x=alt.X("site:O", title="Site", axis=x_axis),
y=alt.Y(
"ab", title=None, sort=sort_order, axis=alt.Axis(grid=False)
), # Correctly apply custom sort order
tooltip=["site", alt.Tooltip("value", format=".2f")],
)
.properties(width=alt.Step(10), height=alt.Step(11))
)
# Define the chart for empty cells
chart_empty = base.mark_rect(color="#e6e7e8").transform_filter(
alt.datum.effect == "escape_mean"
)
if not effect_legend_added:
# Define the chart for cells with effect
chart_effect = (
base.mark_rect(stroke="black", strokeWidth=0.25)
.encode(
color=alt.condition(
'datum.effect == "escape_mean"',
alt.Color(
"value:Q", scale=color_scale_effect
), # Define a color scale
alt.value("transparent"),
)
)
.transform_filter(alt.datum.effect == "escape_mean")
)
effect_legend_added = True
else:
# Define the chart for cells with effect
chart_effect = (
base.mark_rect(stroke="black", strokeWidth=0.25)
.encode(
color=alt.condition(
'datum.effect == "escape_mean"',
alt.Color(
"value:Q", scale=color_scale_effect, legend=None
), # Define a color scale
alt.value("transparent"),
)
)
.transform_filter(alt.datum.effect == "escape_mean")
)
if not binding_legend_added:
chart_binding = (
base.mark_rect(strokeWidth=1.1)
.encode(
stroke=alt.value("value"),
color=alt.condition(
'datum.effect == "escape_mean"',
alt.Color("value:Q", scale=color_scale_binding),
alt.value("transparent"),
),
)
.transform_filter(alt.datum.ab == "Ephrin-B2 binding")
)
binding_legend_added = True
else:
chart_binding = (
base.mark_rect(strokeWidth=1.1)
.encode(
stroke=alt.value("value"),
color=alt.condition(
'datum.effect == "escape_mean"',
alt.Color("value:Q", scale=color_scale_binding, legend=None),
alt.value("transparent"),
),
)
.transform_filter(alt.datum.ab == "Ephrin-B2 binding")
)
chart_poly = (
base.mark_rect(color="black")
.encode()
.transform_filter(alt.datum.ab == "NiV Polymorphism")
)
# Layer the charts using `layer` instead of `+`
chart = alt.layer(chart_empty, chart_effect, chart_poly).resolve_scale(
color="independent"
)
charts.append(chart)
combined_chart = alt.vconcat(
*charts, spacing=5, title="Heatmap of max mAb escape and Nipah Polymorphisms"
).resolve_scale(y="shared", x="independent", color="shared")
return combined_chart
# Assuming `all_empties_df` is your DataFrame and already defined
chart = make_heatmap_with_polymorphisms(all_empties_df)
chart.display()
if mab_line_escape_plot is not None:
chart.save(aggregate_mab_and_niv_polymorphism)
Make a plot comparing escape at Hendra and Nipah substitutions¶
In [33]:
hendra_sites = [76, 82, 85, 86, 89, 90, 96, 98, 136, 144, 174, 175, 176, 180, 187, 194, 195, 196, 201, 209, 211, 212, 213, 215, 224, 226, 228, 236, 241, 244, 245, 261, 265, 277, 279, 280, 284, 285, 287, 288, 289, 293, 299, 309, 311, 312, 315, 316, 317, 322, 326, 327, 329, 333, 334, 335, 337, 338, 339, 340, 342, 344, 371, 376, 385, 386, 388, 392, 401, 402, 403, 404, 420, 422, 423, 424, 425, 426, 427, 432, 434, 437, 446, 458, 466, 470, 473, 476, 478, 483, 498, 502, 507, 517, 520, 525, 527, 538, 548, 549, 550, 553, 554, 564, 569, 571, 586, 599, 602]
cedar_sites = [71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 89, 92, 93, 94, 95, 96, 98, 99, 100, 102, 106, 108, 109, 110, 111, 112, 113, 115, 117, 118, 120, 121, 122, 123, 125, 126, 127, 129, 131, 132, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 147, 149, 150, 152, 155, 163, 164, 166, 167, 168, 169, 170, 171, 172, 173, 174, 176, 177, 178, 179, 180, 182, 184, 186, 187, 188, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 202, 204, 206, 207, 210, 211, 212, 213, 214, 215, 217, 218, 219, 223, 224, 225, 226, 228, 230, 232, 234, 236, 238, 241, 242, 243, 244, 246, 247, 248, 249, 250, 251, 252, 255, 256, 261, 262, 266, 267, 268, 269, 270, 271, 272, 273, 275, 276, 277, 278, 280, 281, 283, 284, 286, 287, 289, 290, 291, 292, 294, 296, 297, 299, 300, 301, 302, 303, 304, 305, 308, 310, 312, 313, 314, 315, 316, 318, 319, 320, 321, 322, 325, 326, 328, 329, 330, 332, 333, 340, 341, 342, 343, 346, 347, 348, 349, 350, 351, 353, 354, 356, 357, 358, 360, 361, 362, 363, 364, 366, 367, 368, 369, 370, 371, 372, 373, 374, 376, 377, 378, 379, 380, 383, 384, 385, 386, 387, 389, 391, 392, 394, 396, 397, 399, 400, 401, 402, 403, 406, 407, 408, 409, 410, 411, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 426, 427, 428, 429, 430, 432, 433, 434, 435, 438, 442, 443, 444, 445, 446, 447, 451, 452, 453, 456, 458, 463, 464, 466, 468, 470, 471, 472, 473, 475, 476, 477, 480, 489, 491, 492, 496, 497, 498, 504, 505, 507, 511, 512, 513, 514, 516, 517, 518, 519, 520, 521, 522, 523, 525, 529, 531, 536, 537, 541, 542, 543, 544, 548, 549, 550, 551, 552, 553, 554, 555, 556, 558, 559, 560, 562, 564, 568, 569, 570, 571, 572, 577, 578, 580, 581, 582, 583, 584, 585, 586, 587, 591, 592, 593, 594, 595, 599, 600]
def make_polymorphism_escape(df,sites_list,title_name):
df['is_poly'] = df['site'].isin(sites_list)
aggregated_df = df.groupby(['site', 'ab']).agg({
'escape_mean': 'mean',
'wildtype': 'first',
'is_poly': 'first'
}).reset_index()
aggregated_df['is_poly'] = aggregated_df['is_poly'].map({True: 'Polymorphic', False: 'Conserved'})
empty_chart = []
antibodies = ['m102.4', 'HENV-26', 'HENV-117', 'HENV-103', 'HENV-32', 'nAH1.3']
for index, ab in enumerate(antibodies):
tmp_df = aggregated_df[aggregated_df['ab'] == ab]
# Conditionally set the y-axis title for only the first chart
y_axis_title = 'Mean Escape' if index == 0 else None
base = alt.Chart(tmp_df).mark_point(size=30, opacity=1, filled=True,strokeWidth=0.75,stroke='black').encode(
x=alt.X("is_poly", title=None, axis=alt.Axis(labelAngle=-90, grid=False)),
xOffset="random:Q",
y=alt.Y("escape_mean", title=y_axis_title, axis=alt.Axis(grid=True, tickCount=2)),
tooltip=["site"],
color=alt.Color('is_poly',legend=None),
).transform_calculate(
random="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(
title={
"text": f'{ab}',
"fontSize": 10, # Adjust font size as needed
"align": "center",
'anchor': "middle"
},
width=60,
)
empty_chart.append(base)
combined_chart = alt.hconcat(*empty_chart, spacing=10,title=title_name).resolve_scale(y="shared", color="shared")
return combined_chart
nipah_poly_chart = make_polymorphism_escape(combined_df,config['nipah_poly'],'Antibody Escape at Nipah Polymorphic Sites')
nipah_poly_chart.display()
hendra_poly_chart = make_polymorphism_escape(combined_df, hendra_sites,'Hendra substitutions')
hendra_poly_chart.display()
cedar_poly_chart = make_polymorphism_escape(combined_df, cedar_sites,'Cedar substitutions')
cedar_poly_chart.display()
In [34]:
hendra_sites = [76, 82, 85, 86, 89, 90, 96, 98, 136, 144, 174, 175, 176, 180, 187, 194, 195, 196, 201, 209, 211, 212, 213, 215, 224, 226, 228, 236, 241, 244, 245, 261, 265, 277, 279, 280, 284, 285, 287, 288, 289, 293, 299, 309, 311, 312, 315, 316, 317, 322, 326, 327, 329, 333, 334, 335, 337, 338, 339, 340, 342, 344, 371, 376, 385, 386, 388, 392, 401, 402, 403, 404, 420, 422, 423, 424, 425, 426, 427, 432, 434, 437, 446, 458, 466, 470, 473, 476, 478, 483, 498, 502, 507, 517, 520, 525, 527, 538, 548, 549, 550, 553, 554, 564, 569, 571, 586, 599, 602]
cedar_sites = [71, 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 89, 92, 93, 94, 95, 96, 98, 99, 100, 102, 106, 108, 109, 110, 111, 112, 113, 115, 117, 118, 120, 121, 122, 123, 125, 126, 127, 129, 131, 132, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 147, 149, 150, 152, 155, 163, 164, 166, 167, 168, 169, 170, 171, 172, 173, 174, 176, 177, 178, 179, 180, 182, 184, 186, 187, 188, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 202, 204, 206, 207, 210, 211, 212, 213, 214, 215, 217, 218, 219, 223, 224, 225, 226, 228, 230, 232, 234, 236, 238, 241, 242, 243, 244, 246, 247, 248, 249, 250, 251, 252, 255, 256, 261, 262, 266, 267, 268, 269, 270, 271, 272, 273, 275, 276, 277, 278, 280, 281, 283, 284, 286, 287, 289, 290, 291, 292, 294, 296, 297, 299, 300, 301, 302, 303, 304, 305, 308, 310, 312, 313, 314, 315, 316, 318, 319, 320, 321, 322, 325, 326, 328, 329, 330, 332, 333, 340, 341, 342, 343, 346, 347, 348, 349, 350, 351, 353, 354, 356, 357, 358, 360, 361, 362, 363, 364, 366, 367, 368, 369, 370, 371, 372, 373, 374, 376, 377, 378, 379, 380, 383, 384, 385, 386, 387, 389, 391, 392, 394, 396, 397, 399, 400, 401, 402, 403, 406, 407, 408, 409, 410, 411, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 426, 427, 428, 429, 430, 432, 433, 434, 435, 438, 442, 443, 444, 445, 446, 447, 451, 452, 453, 456, 458, 463, 464, 466, 468, 470, 471, 472, 473, 475, 476, 477, 480, 489, 491, 492, 496, 497, 498, 504, 505, 507, 511, 512, 513, 514, 516, 517, 518, 519, 520, 521, 522, 523, 525, 529, 531, 536, 537, 541, 542, 543, 544, 548, 549, 550, 551, 552, 553, 554, 555, 556, 558, 559, 560, 562, 564, 568, 569, 570, 571, 572, 577, 578, 580, 581, 582, 583, 584, 585, 586, 587, 591, 592, 593, 594, 595, 599, 600]
def make_polymorphism_escape(df,sites_list,title_name):
df['is_poly'] = df['site'].isin(sites_list)
aggregated_df = df.groupby(['site', 'ab']).agg({
'escape_mean': 'mean',
'wildtype': 'first',
'is_poly': 'first'
}).reset_index()
aggregated_df['is_poly'] = aggregated_df['is_poly'].map({True: 'Polymorphic', False: 'Conserved'})
empty_chart = []
antibodies = ['m102.4', 'HENV-26', 'HENV-117', 'HENV-103', 'HENV-32', 'nAH1.3']
base = alt.Chart(aggregated_df.query('escape_mean > 0'),title='Antibody Escape at Nipah Polymorphic Sites').mark_point(size=50, opacity=1, filled=True,strokeWidth=0.75,stroke='black').encode(
y=alt.X("is_poly", title=None, axis=alt.Axis(grid=False)),
yOffset="random:Q",
x=alt.Y("escape_mean", axis=alt.Axis(grid=True, tickCount=3)),
tooltip=["site"],
color=alt.Color('is_poly'),
row=alt.Row('ab',sort=antibodies,title=None,header=None)
).transform_calculate(
random="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(width=300,height=20)
return base
nipah_poly_chart = make_polymorphism_escape(combined_df,config['nipah_poly'],'Antibody Escape at Nipah Polymorphic Sites')
nipah_poly_chart.display()
In [35]:
test = alt.vconcat(nipah_poly_chart,hendra_poly_chart,cedar_poly_chart)
test.display()
In [36]:
def make_polymorphism_escape(df):
df['is_poly'] = df['site'].isin(niv_poly)
aggregated_df = df.groupby(['site']).agg({
'escape_mean': 'mean',
'wildtype': 'first',
'is_poly': 'first'
}).reset_index()
empty_chart = []
#antibodies = ['m102.4', 'HENV-26', 'HENV-117', 'HENV-103', 'HENV-32', 'nAH1.3']
#for index, ab in enumerate(antibodies):
#tmp_df = aggregated_df[aggregated_df['ab'] == ab]
# Conditionally set the y-axis title for only the first chart
#y_axis_title = 'Summed Escape' if index == 0 else None
base = alt.Chart(aggregated_df).mark_point(size=25, opacity=0.2, filled=True,strokeWidth=0.5,stroke='black').encode(
x=alt.X("is_poly", title=None, axis=alt.Axis(labelAngle=-90, grid=False)),
xOffset="random:Q",
y=alt.Y("escape_mean", title=None, axis=alt.Axis(grid=True, tickCount=5)),
tooltip=["site"],
color=alt.Color('is_poly',title='Polymorphic Site'),
).transform_calculate(
random="sqrt(-2*log(random()))*cos(2*PI*random())"
).properties(
width=50,
)
# empty_chart.append(base)
# combined_chart = alt.hconcat(*empty_chart, spacing=10).resolve_scale(y="independent", color="shared")
return base
chart = make_polymorphism_escape(combined_df)
chart.display()
Make plots comparing escape with binding to see if escape sites do so by increasing binding¶
In [37]:
ab_list1 = ["m102.4", "HENV-26", "HENV-117"]
ab_list2 = ["HENV-103", "HENV-32"]
ab_list3 = ["nAH1.3"]
def plot_escape_vs_binding(df):
variant_selector = alt.selection_point(
on="mouseover", empty=False, nearest=True, fields=["site"], value=1
)
empty_chart1 = []
for ab in ab_list1:
tmp_df = df[df["ab"] == ab]
base = (
alt.Chart(tmp_df, title=alt.Title(f"{ab}", anchor="middle"))
.mark_point(
filled=True, size=15, color="#1f4e79", opacity=0.15, stroke="black"
)
.encode(
alt.X(
"binding_mean",
title="EFNB2 Binding",
axis=alt.Axis(grid=True, tickCount=3),
),
alt.Y(
"escape_mean",
title="Antibody Escape",
axis=alt.Axis(grid=True, tickCount=3),
),
tooltip=[
"site",
"wildtype",
"mutant",
"escape_mean",
"binding_mean",
],
opacity=alt.condition(variant_selector, alt.value(1), alt.value(0.2)),
size=alt.condition(variant_selector, alt.value(50), alt.value(20)),
strokeWidth=alt.condition(variant_selector, alt.value(2), alt.value(0)),
)
)
empty_chart1.append(base)
combined_chart1 = alt.hconcat(*empty_chart1, spacing=5).resolve_scale(
x="shared", y="shared"
)
empty_chart2 = []
for ab in ab_list2:
tmp_df = df[df["ab"] == ab]
base = (
alt.Chart(tmp_df, title=alt.Title(f"{ab}", anchor="middle"))
.mark_point(
filled=True, size=15, color="#ff7f0e", opacity=0.15, stroke="black"
)
.encode(
alt.X(
"binding_mean",
title="EFNB2 Binding",
axis=alt.Axis(grid=True, tickCount=3),
),
alt.Y(
"escape_mean",
title="Antibody Escape",
axis=alt.Axis(grid=True, tickCount=3),
),
tooltip=[
"site",
"wildtype",
"mutant",
"escape_mean",
"binding_mean",
],
opacity=alt.condition(variant_selector, alt.value(1), alt.value(0.2)),
size=alt.condition(variant_selector, alt.value(50), alt.value(20)),
strokeWidth=alt.condition(variant_selector, alt.value(2), alt.value(0)),
)
)
empty_chart2.append(base)
combined_chart2 = alt.hconcat(*empty_chart2, spacing=5).resolve_scale(
x="shared", y="shared"
)
empty_chart3 = []
for ab in ab_list3:
tmp_df = df[df["ab"] == ab]
base3 = (
alt.Chart(tmp_df, title=alt.Title(f"{ab}", anchor="middle"))
.mark_point(
filled=True, size=15, color="#2ca02c", opacity=0.15, stroke="black"
)
.encode(
alt.X(
"binding_mean",
title="EFNB2 Binding",
axis=alt.Axis(grid=True, tickCount=3),
),
alt.Y(
"escape_mean",
title="Antibody Escape",
axis=alt.Axis(grid=True, tickCount=3),
),
tooltip=[
"site",
"wildtype",
"mutant",
"escape_mean",
"binding_mean",
],
opacity=alt.condition(variant_selector, alt.value(1), alt.value(0.2)),
size=alt.condition(variant_selector, alt.value(50), alt.value(20)),
strokeWidth=alt.condition(variant_selector, alt.value(2), alt.value(0)),
)
)
combined_chart_total = alt.vconcat(
combined_chart1,
combined_chart2,
base3,
title=alt.Title(
"Antibody Escape versus Binding",
subtitle="Colored by Epitope. Hover over points to see the same sites",
),
).add_params(
variant_selector
)
return combined_chart_total
#tmp_img_test = plot_escape_vs_binding(combined_df)
#tmp_img_test.display()
#if mab_line_escape_plot is not None:
# tmp_img_test.save(binding_vs_escape)
In [ ]: